simply put, a network is a collection of connected objects. networks represent interrelationships between actors of all sorts. networks focus on the relationships between actors.
components: nodes/vertices, links/edges. “We refer to the objects as nodes or vertices, and usually draw them as points. We refer to the connections between the nodes as edges, and usually draw them as lines between points.”
network
different types of networks
#required packages
pkgs = c("tidyverse", "igraph", "ggraph", "tidygraph")
#install the packages that you don't have using install.packages("X")
#load packages
ld_pkgs = lapply(pkgs, library, character.only = TRUE)
#read data
df = read_csv("1. sample twitter data (SCOTUS).csv")
#edges (edgelist)
edges = df %>% filter(retweet_user_handle != "\\N") %>%
select(user_handle, retweet_user_handle) %>%
count(user_handle, retweet_user_handle) %>% ungroup() %>%
rename(from = user_handle, to = retweet_user_handle, weight = n)
head(edges)
## # A tibble: 6 x 3
## from to weight
## <chr> <chr> <int>
## 1 ___Danno ctvqp 1
## 2 __Dayo KHOU 1
## 3 __Resonance The_Tusker 1
## 4 _1_____________ Reuters 1
## 5 _Alibama BreakingNews 1
## 6 _Arvindh FactHive 1
#nodes and their attributes
#total degree
nodes = tibble(handle = unique(c(edges$from, edges$to)))
#out degree
temp_out = as.data.frame(table(edges$from))
colnames(temp_out) = c("handle","out_degree")
#in degree
temp_in = as.data.frame(table(edges$to))
colnames(temp_in) = c("handle","in_degree")
#merge
nodes = nodes %>% left_join(temp_out, by = "handle") %>% left_join(temp_in, by = "handle") %>%
mutate(high_indegree = ifelse(in_degree > 29, 1, 0))
#
nodes[is.na(nodes)] = 0
head(nodes)
## # A tibble: 6 x 4
## handle out_degree in_degree high_indegree
## <chr> <dbl> <dbl> <dbl>
## 1 ___Danno 1 0 0
## 2 __Dayo 1 0 0
## 3 __Resonance 1 0 0
## 4 _1_____________ 1 0 0
## 5 _Alibama 1 0 0
## 6 _Arvindh 1 0 0
ggraph is an extension of ggplot2, so you can easily apply ggplot grammer to quickly learning how ggraph works.
Layouts
Nodes
Edges
#construct the ggraph object
rt_net <- tbl_graph(nodes = nodes, edges = edges, directed = TRUE)
#inspect the object
class(rt_net)
## [1] "tbl_graph" "igraph"
rt_net
## # A tbl_graph: 9470 nodes and 7139 edges
## #
## # A directed simple graph with 2377 components
## #
## # Node Data: 9,470 x 4 (active)
## handle out_degree in_degree high_indegree
## <chr> <dbl> <dbl> <dbl>
## 1 ___Danno 1 0 0
## 2 __Dayo 1 0 0
## 3 __Resonance 1 0 0
## 4 _1_____________ 1 0 0
## 5 _Alibama 1 0 0
## 6 _Arvindh 1 0 0
## # … with 9,464 more rows
## #
## # Edge Data: 7,139 x 3
## from to weight
## <int> <int> <int>
## 1 1 6624 1
## 2 2 6625 1
## 3 3 6626 1
## # … with 7,136 more rows
ggraph(rt_net) +
geom_edge_link() +
geom_node_point()
## Using `nicely` as default layout
ggraph(rt_net, layout = 'nicely') +
geom_edge_link(aes(width = weight), alpha = 0.8) +
scale_edge_width(range = c(0.2, 1)) +
geom_node_point(aes(color = as.factor(high_indegree), size = in_degree)) +
geom_node_label(aes(filter = (high_indegree ==1), label = handle), repel = TRUE)
you can specify layout: https://igraph.org/r/doc/layout_.html
you can even use the facet function:
#construct the igraph object
g = graph.data.frame(d = edges, vertices=nodes, directed=TRUE)
#other ways to contrsuct the igraph object:
#graph_from_adjacency_matrix, graph_from_data_frame, graph_from_edgelist; see: https://igraph.org/r/doc/
#inspect the igraph object
class(g)
## [1] "igraph"
g
## IGRAPH 3d4ef0b DNW- 9470 7139 --
## + attr: name (v/c), out_degree (v/n), in_degree (v/n),
## | high_indegree (v/n), weight (e/n)
## + edges from 3d4ef0b (vertex names):
## [1] ___Danno ->ctvqp __Dayo ->KHOU
## [3] __Resonance ->The_Tusker _1_____________->Reuters
## [5] _Alibama ->BreakingNews _Arvindh ->FactHive
## [7] _BannedInBoston->Occupied_Nation _bee_buzz_ ->MARGAlade
## [9] _chynadoll_ ->WSJ _CJustice_ ->peoplefor
## [11] _clairehuxtable->OBABL _eights ->jgalicot
## [13] _Ella_G ->HuffingtonPost _filo__ ->TheAtlantic
## + ... omitted several edges
g[][100:110, 1:5]
## 11 x 5 sparse Matrix of class "dgCMatrix"
## ___Danno __Dayo __Resonance _1_____________ _Alibama
## 7NewsBrisbane . . . . .
## 7thgenyang . . . . .
## 8s . . . . .
## 900chuhay . . . . .
## 905GoTrainGirl . . . . .
## 99islandsummer . . . . .
## 99Pele . . . . .
## A_amusa . . . . .
## a_d_wood . . . . .
## A_Peabody . . . . .
## A_Sherie . . . . .
E(g)
## + 7139/7139 edges from 3d4ef0b (vertex names):
## [1] ___Danno ->ctvqp __Dayo ->KHOU
## [3] __Resonance ->The_Tusker _1_____________->Reuters
## [5] _Alibama ->BreakingNews _Arvindh ->FactHive
## [7] _BannedInBoston->Occupied_Nation _bee_buzz_ ->MARGAlade
## [9] _chynadoll_ ->WSJ _CJustice_ ->peoplefor
## [11] _clairehuxtable->OBABL _eights ->jgalicot
## [13] _Ella_G ->HuffingtonPost _filo__ ->TheAtlantic
## [15] _grandhotel ->txgdb _HeavyP ->BreakingNews
## [17] _isabelacb ->bbcnews_ticker _JBrown22 ->_JMurray23
## [19] _jesseamaya ->JLLLOW _justBITTEN ->MischterX
## + ... omitted several edges
E(g)$weight[1:10]
## [1] 1 1 1 1 1 1 1 1 1 1
V(g)
## + 9470/9470 vertices, named, from 3d4ef0b:
## [1] ___Danno __Dayo __Resonance _1_____________
## [5] _Alibama _Arvindh _BannedInBoston _bee_buzz_
## [9] _chynadoll_ _CJustice_ _clairehuxtable _eights
## [13] _Ella_G _filo__ _grandhotel _HeavyP
## [17] _isabelacb _JBrown22 _jesseamaya _justBITTEN
## [21] _ldeassis_ _Nigerienne_ _Orwell _PDSweetTarts
## [25] _shaymt _skreetch_ _StuartGray _TheREBEL111
## [29] _TselNoelle_ _YeaSheDarkSkin 0missjones 0Yara0
## [33] 100Royd 12030312 123soxfan 124786Ashraf
## [37] 147DW 147maks 15say 1789kevin
## + ... omitted several vertices
V(g)$name[1:10]
## [1] "___Danno" "__Dayo" "__Resonance"
## [4] "_1_____________" "_Alibama" "_Arvindh"
## [7] "_BannedInBoston" "_bee_buzz_" "_chynadoll_"
## [10] "_CJustice_"
V(g)$out_degree[1:10]
## [1] 1 1 1 1 1 1 1 1 1 1
V(g)$in_degree[1:10]
## [1] 0 0 0 0 0 0 0 0 0 0
V(g)$high_indegree[1:10]
## [1] 0 0 0 0 0 0 0 0 0 0
# edge_attr(g)
# vertex_attr(g)
# you can set new attributes using: V(g)$media <- vector
# g <- simplify(g, remove.multiple = F, remove.loops = T)
#specify a particular layout: https://igraph.org/r/doc/layout_.html
l <- layout_nicely(g)
# l <- layout_on_grid(g)
#plot retweet network
plot(g, layout=l,
vertex.size = ifelse(V(g)$in_degree > 29, 1.5, 0.5),
vertex.color = ifelse(V(g)$in_degree > 29, "red", "grey"),
vertex.label = ifelse(V(g)$in_degree > 29, V(g)$name, NA),
# vertex.label.color = ifelse(V(g)$media == 1, "red", "black"),
vertex.label.cex = 1,
vertex.label.dist = 0.5,
edge.arrow.size = 0.5,
edge.arrow.width = 0.5,
edge.width = E(g)$weight,
edge.color = "gray")
#Density: The proportion of present edges from all possible edges in the network.
edge_density(g, loops=F)
## [1] 7.961287e-05
ecount(g)/(vcount(g)*(vcount(g)-1)) #for a directed gwork
## [1] 7.961287e-05
#Reciprocity: The proportion of reciprocated ties (for a directed gwork).
reciprocity(g)
## [1] 0.0002801513
#Node degrees and distribution: The function degree() has a mode of in for in-degree, out for out-degree, and all or total for total degree.
deg <- degree(g, mode="all")
summary(deg)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.000 1.000 1.000 1.508 1.000 117.000
hist(log(deg), breaks = 50, main="Histogram of node degree")
#Degree distribution
deg.dist <- degree_distribution(g, cumulative=T, mode="all")
plot( x=0:max(deg), y=1-deg.dist, pch=19, cex=1.2, col="orange",
xlab="Degree", ylab="Cumulative Frequency")
#Centrality & centralization
# in_degree = degree(g, mode="in")
# centr_degree(g, mode="in", normalized=T)
#
# closeness(g, mode="all", weights=NA)
# centr_clo(g, mode="all", normalized=T)
#
# eigen_centrality(g, directed=T, weights=NA)
# centr_eigen(g, directed=T, normalized=T)
#
# betweenness(g, directed=T, weights=NA)
# edge_betweenness(g, directed=T, weights=NA)
# centr_betw(g, directed=T, normalized=T)
#Hubs and authorities
hs <- hub_score(g, weights=NA)$vector
summary(hs)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00000 0.00000 0.00000 0.01311 0.00000 1.00000
sort(hs, decreasing = TRUE)[1:30]
## EricaMowry Rushboes_cigar JacquelynCalver NickASAVet
## 1.0000000 0.9982954 0.9895504 0.9692826
## TallyAnnaE AlessCullen dondanl Lone_Star_Dem
## 0.9588446 0.9586887 0.9586146 0.9586146
## BukowsKai shacker56 TomMHarrison Active_Artist
## 0.9585424 0.9585424 0.9585424 0.9502951
## aglac AKorst alexisairvin AmirZakii
## 0.9502951 0.9502951 0.9502951 0.9502951
## andriconthejob AngryBroads AOKiger BonnieNathan
## 0.9502951 0.9502951 0.9502951 0.9502951
## boomernerd boyjohn Brett_Myers BubbyKatz
## 0.9502951 0.9502951 0.9502951 0.9502951
## caroldeserio cbaumer10 cbrodrick crankydem
## 0.9502951 0.9502951 0.9502951 0.9502951
## damelio383 danero
## 0.9502951 0.9502951
plot(g, layout=l,
vertex.size= ifelse(hs > 0.9502951, 1.5, 0.2),
vertex.color=ifelse(hs > 0.9502951 , "red", "grey"),
vertex.label = NA,
# vertex.label.color = ifelse(V(g)$media == 1, "red", "black"),
edge.arrow.size=0.1,
edge.arrow.width=0.1,
edge.width= E(g)$weight/21,
edge.color="gray")
as <- authority_score(g, weights=NA)$vector
summary(as)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0000000 0.0000000 0.0000000 0.0001321 0.0000000 1.0000000
sort(as, decreasing = TRUE)[1:30]
## LOLGOP HuffingtonPost elizabethforma BarackObama
## 1.0000000000 0.0523046623 0.0505108573 0.0413084505
## peoplefor Salon Truthbuster HuffPostMiami
## 0.0109722462 0.0090084021 0.0089966623 0.0088326295
## MatthewCallaway ALW2 SirrK Voter99percent
## 0.0087546142 0.0087546142 0.0086786361 0.0086786361
## Foxfieldguy randiradio RWwatchMA WSJ
## 0.0086786361 0.0027201427 0.0009964087 0.0009016215
## HuffPostPol iSupremeCourt AriBerman sahilkapur
## 0.0008840206 0.0006221957 0.0005828569 0.0005793129
## BoldProgressive OrganicConsumer TPM ProgressMass
## 0.0005510085 0.0004968238 0.0004943750 0.0004938370
## TheLastWord JeffersonObama allout NBCNews
## 0.0004754221 0.0004710197 0.0004702283 0.0004668465
## 1BeautifulKarma TheMsVee83
## 0.0004665025 0.0004663393
plot(g, layout=l,
vertex.size= ifelse(as > 0.0004668465, 1.5, 0.2),
vertex.color=ifelse(as > 0.0004668465 , "red", "grey"),
vertex.label = NA,
# vertex.label.color = ifelse(V(g)$media == 1, "red", "black"),
edge.arrow.size=0.1,
edge.arrow.width=0.1,
edge.width= E(g)$weight/21,
edge.color="gray")
#Subgroups and communities
g.sym <- as.undirected(g, mode= "collapse", edge.attr.comb=list(weight="sum", "ignore"))
#find cliques
# cliques(g.sym) # list of cliques
# sapply(cliques(g.sym), length) # clique sizes
largest_cliques(g.sym) # cliques with max number of nodes
## [[1]]
## + 3/9470 vertices, named, from c79aca7:
## [1] PositiveLiteCom ruraltweeter videodouble
##
## [[2]]
## + 3/9470 vertices, named, from c79aca7:
## [1] globaltvnews Beari8it TarSandsTwat
##
## [[3]]
## + 3/9470 vertices, named, from c79aca7:
## [1] IamTlewis BanksforJustice MBK_91
##
## [[4]]
## + 3/9470 vertices, named, from c79aca7:
## [1] usmanmanzoor omar_quraishi sami_ravian
#community detection
# ceb <- cluster_edge_betweenness(g)
# dendPlot(ceb, mode="hclust")